package net.trustie.model;

import java.util.Date;
import java.util.List;

import org.apache.commons.codec.digest.DigestUtils;

import net.trustie.utils.DateHandler;
import net.trustie.utils.StringHandler;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import core.AfterExtractor;
import core.Page;
import core.ValidateExtractor;

public class lagou_Model implements AfterExtractor, ValidateExtractor {

	private String post_url = "";
	private String post_id = "";
	@ExtractBy("//*[@id='container']/div[1]/div[1]/dl/dd[1]/div/text()")
	private String post_time = "";
	
	@ExtractBy("//*dl[@class='job_detail']/dt/h1/allText()")
	private String post_title = "";
	@ExtractBy("//*[@id='container']/div[1]/div[1]/dl/dd[1]/text()")
	private String post_tags = "";
	@ExtractBy("//*[@id='container']/div[1]/div[1]/dl/dd[2]/allText()")
	private String post_content = "";
	@ExtractBy("//*[@id='container']/div[1]/div[1]/dl/dd[1]/span[5]/text()")
	public String work_type = "";
	@ExtractBy("//*[@id='container']/div[1]/div[1]/dl/dd[1]/span[4]/text()")
	private String degree = "";
	@ExtractBy("//*[@id='container']/div[1]/div[1]/dl/dd[1]/span[1]/text()")
	private String salary = "";
	@ExtractBy("//*[@id='container']/div[1]/div[1]/dl/dd[1]/span[3]/text()")
	private String experience = "";
	@ExtractBy("//*[@id='container']/div[1]/div[2]/dl/dt/a/img/@alt")
	private String comp_name = "";
	@ExtractBy("//*[@id='container']/div[1]/div[2]/dl/dd/ul[1]/li[3]/a/text()")
	private String comp_url = "";
	@ExtractBy("//*[@id='container']/div[1]/div[1]/dl[1]/dd[1]/span[2]/text()")
	private String addr = "";
	@ExtractBy("//*[@id='container']/div[1]/div[2]/dl/dd/ul[1]/li[2]/allText()")
	private String comp_scale = "";
	@ExtractBy("//*[@id='container']/div[1]/div[2]/dl/dd/ul[1]/li[1]/allText()")
	private String comp_field = "";
	@ExtractBy("//*[@id='container']/div[1]/div[2]/dl/dd/ul[2]/li/allText()")
	private String financing = "";
	private String extractTime = "";
	private int history = 0;

	private String source ="neitui";
	private String type="job";
	private String url_md5;
	private String similar_position;
	@ExtractBy("//*ul[@class='guess_like reset']/li/a/@href")
	private List<String> similar_positions;
	public void afterProcess(Page page) {
		this.similar_position = StringHandler.combineTags(this.similar_positions);
		// 处理page_url
		this.post_url = page.getPageUrl();
		this.url_md5 = DigestUtils.md5Hex(this.post_url);
		// 处理post_id
		this.post_id = StringHandler
				.matchRightString(page.getPageUrl(), "\\d+");
		// 处理post_tags
		this.post_tags = "<" + post_tags.replaceAll("职位诱惑 : ", "").trim() + ">";
		// 处理extractTime
		this.extractTime = DateHandler.getExtractTime();
		// 处理post_content
		this.post_content = post_content.replaceAll("职位描述", "");
		// 处理comp_scale
		this.comp_scale = comp_scale.replaceAll("规模 ", "");
		// 处理comp_field
		this.comp_field = comp_field.replaceAll("领域 ", "");
		// 处理financing
		this.financing = financing.replaceAll("目前阶段 ", "");
		// 处理post_time
		this.post_time = post_time.replaceAll("发布时间：", "").replaceAll("发布", "");
		Date date = new Date();

		if (StringHandler.canMatchRightString(post_time, "\\d+:\\d+"))
			this.post_time = extractTime.replaceAll("\\d+:\\d+:\\d+",
					this.post_time + ":00");
		/*
		 * else if(StringHandler.canMatchRightString(post_time, "\\d+\\S+"))
		 * this.post_time=DateHandler.handlerDefaultDate(post_time, date);
		 */
		else
			this.post_time = DateHandler.formatAllTypeDate(post_time, date);
	}

	public void validate(Page page) {
		if (StringHandler.isAtLeastOneBlank(this.post_url, this.post_title,
				this.extractTime)) {
			page.setResultSkip(this, true);
			return;
		}
		if (!DateHandler.canFormatToDate(this.post_time, this.extractTime)) {
			page.setResultSkip(this, true);
		}	
	}

	public String getPost_id() {
		return post_id;
	}

	public void setPost_id(String post_id) {
		this.post_id = post_id;
	}

	public String getPost_url() {
		return post_url;
	}

	public void setPost_url(String post_url) {
		this.post_url = post_url;
	}

	public String getPost_time() {
		return post_time;
	}

	public void setPost_time(String post_time) {
		this.post_time = post_time;
	}

	public String getPost_title() {
		return post_title;
	}

	public void setPost_title(String post_title) {
		this.post_title = post_title;
	}

	public String getPost_tags() {
		return post_tags;
	}

	public void setPost_tags(String post_tags) {
		this.post_tags = post_tags;
	}

	public String getPost_content() {
		return post_content;
	}

	public void setPost_content(String post_content) {
		this.post_content = post_content;
	}

	public String getWork_type() {
		return work_type;
	}

	public void setWork_type(String work_type) {
		this.work_type = work_type;
	}

	public String getDegree() {
		return degree;
	}

	public void setDegree(String degree) {
		this.degree = degree;
	}

	public String getSalary() {
		return salary;
	}

	public void setSalary(String salary) {
		this.salary = salary;
	}

	public String getExperience() {
		return experience;
	}

	public void setExperience(String experience) {
		this.experience = experience;
	}

	public String getComp_name() {
		return comp_name;
	}

	public void setComp_name(String comp_name) {
		this.comp_name = comp_name;
	}

	public String getComp_url() {
		return comp_url;
	}

	public void setComp_url(String comp_url) {
		this.comp_url = comp_url;
	}

	public String getAddr() {
		return addr;
	}

	public void setAddr(String addr) {
		this.addr = addr;
	}

	public String getComp_scale() {
		return comp_scale;
	}

	public void setComp_scale(String comp_scale) {
		this.comp_scale = comp_scale;
	}

	public String getComp_field() {
		return comp_field;
	}

	public void setComp_field(String comp_field) {
		this.comp_field = comp_field;
	}

	public String getFinancing() {
		return financing;
	}

	public void setFinancing(String financing) {
		this.financing = financing;
	}

	public String getExtractTime() {
		return extractTime;
	}

	public void setExtractTime(String extractTime) {
		this.extractTime = extractTime;
	}

	public int getHistory() {
		return history;
	}

	public void setHistory(int history) {
		this.history = history;
	}

}
